#include "StdAfx.h"
#include "PolicyTreeBuilder.h"

//////////////////////////////////////////////////////////////////////////
//Author	:	Ross Conroy ross.conroy@tees.ac.uk
//Date		:	14/08/2014
//
//Converts an input DID into a policy tree using the following algorithm
//root node = a1 best action
//root node observations = o2 observations
//
//For Each observation o2
//	Set Evidence o2 to observation
//	node = a2 best action
//End For
//
//t = 3
//
//For Each leafNode
//	For Each observation ot
//		clear domain
//		set domain actions and observations to parent history
//		new leaf for observation = best action at
//	End For
//End For
//////////////////////////////////////////////////////////////////////////
PolicyTreeBuilder::PolicyTreeBuilder(void)
{
}

PolicyNode * PolicyTreeBuilder::ConvertToPolicyTree(Domain * inputDID, string actionPrefix, string observationPrefix)
{
	//Get starting nodes
	inputDID->compile();

	ExpandTimeSteps expander;
	int firstStep = expander.FindFirstTimeStep(inputDID);

	stringstream ss;
	ss << actionPrefix << firstStep;
	DiscreteDecisionNode * rootDecisionNode = (DiscreteDecisionNode*)inputDID->getNodeByName(ss.str());
	ss.str("");
	ss.clear();
	ss << observationPrefix << (firstStep + 1);
	DiscreteChanceNode * rootObservationNode = (DiscreteChanceNode*)inputDID->getNodeByName(ss.str());	
	ss.str("");
	ss.clear();

	//get list of possible observations
	list<string> observations;
	for(int i = 0; i < rootObservationNode->getNumberOfStates(); i++)
	{
		observations.push_back(rootObservationNode->getStateLabel(i));
	}

	PolicyNode * rootNode = new PolicyNode(GetBestDecision(rootDecisionNode), observations, NULL);
	list<PolicyNode*> nodes;
	nodes.push_back(rootNode);

	//loop through all nodes and find the leaf ones to expand then expand them
	bool done = false;
	while(!done)
	{
		done = true;
		inputDID->retractFindings();
		for(list<PolicyNode*>::iterator it = nodes.begin(); (it != nodes.end()) && done; it++)
		{
			PolicyNode * node = *it;
			if(node->IsLeafNode())
			{
				if(!node->IsFinalNode())
				{
					//get past branch history
					done = false;
					int timeStep = node->GetTimeStep() + 1;
					list<string> parentHistory = node->GetParentsCombinations(NULL);

					SetDomainHistory(inputDID, parentHistory, observationPrefix, actionPrefix);
					inputDID->propagate();

					//get nodes for next relevant time step
					ss << actionPrefix << timeStep;
					DiscreteDecisionNode * decisonNode = (DiscreteDecisionNode*)inputDID->getNodeByName(ss.str());
					ss.str("");
					ss.clear();
					ss << observationPrefix << timeStep;					
					DiscreteChanceNode * observationNode = (DiscreteChanceNode*)inputDID->getNodeByName(ss.str());	
					ss.str("");
					ss.clear();
					
					ss << observationPrefix << (timeStep + 1);	
					string tmpStr = ss.str();
					DiscreteChanceNode * nextObservationNode = (DiscreteChanceNode*)inputDID->getNodeByName(ss.str());	
					ss.str("");
					ss.clear();
					list<string> tmpObs;
					
					//if not the end of DID get observations for next step
					if(nextObservationNode != NULL)
					{
						for(int i = 0; i < nextObservationNode->getNumberOfStates(); i++)
						{
							tmpObs.push_back(nextObservationNode->getStateLabel(i));
						}
					}

					//loop through each observation and get best action for each adding a new node for each observation
					for(int i = 0; i < observationNode->getNumberOfStates(); i ++)
					{
						observationNode->selectState(i);
						//cout << observationNode->getName() << endl;
						//cout << decisonNode->getName() << endl;
						inputDID->propagate();
						string decision = GetBestDecision(decisonNode);
						PolicyNode * tmpNode = new PolicyNode(decision, tmpObs, node);
						node->SetNodeForObservation(observationNode->getStateLabel(i), tmpNode);
						nodes.push_back(tmpNode);
					}
				}
			}
		}
	}	

	return rootNode;
}

//gets the decision with the maximum expected utility
string PolicyTreeBuilder::GetBestDecision(DiscreteDecisionNode* node)
{
	double tempUtil;
	string maxDecision;
	for(int i = 0; i < node->getNumberOfStates(); i++)
	{
		if(i == 0)
		{
			tempUtil = node->getExpectedUtility(i);
			maxDecision = node->getStateLabel(i);
		}
		else
		{
			if(node->getExpectedUtility(i) > tempUtil)
			{
				tempUtil = node->getExpectedUtility(i);
				maxDecision = node->getStateLabel(i);
			}
		}
	}

	return maxDecision;
}

//sets the history of a domain
Domain * PolicyTreeBuilder::SetDomainHistory(Domain * inputDID, list<string> history, string observationPrefix, string actionPrefix)
{
	int i = 0;
	int t = 1;

	stringstream ss;

	for(list<string>::iterator it = history.begin(); it != history.end(); it++)
	{
		string tempStr = *it;

		if(i%2 == 0)//even means action
		{
			ss << actionPrefix << t;
		}
		else//odd means observation
		{
			t++;
			ss << observationPrefix << t;			
		}

		//set node state from string
		DiscreteNode * node = (DiscreteNode*)inputDID->getNodeByName(ss.str());
		node->selectState(node->getStateIndex(tempStr));

		ss.str("");
		ss.clear();

		i++;
	}


	return inputDID;
}
